#include <vector>

#include "caffe/layers/deconv_layer.hpp"

namespace caffe {

	template<typename Dtype>
	void DeconvolutionLayer<Dtype>::compute_output_shape() {

//		LOG(INFO) << "COMPUTE OUTPUT 1";
		const int kernel_shape[] = {this->kernel_h_, this->kernel_w_};
		const int stride_[] = {this->stride_h_, this->stride_w_};
		const int pad_[] = {this->pad_h_, this->pad_w_};
		const int dilation_[] = {this->dilation_h_, this->dilation_w_};
		const int *kernel_shape_data = kernel_shape;
		const int *stride_data = stride_;
		const int *pad_data = pad_;
		const int *dilation_data = dilation_;
		this->output_shape_.clear();
//		LOG(INFO) << "COMPUTE OUTPUT 2";
		for (int i = 0; i < this->num_spatial_axes_; ++i) {
			// i + 1 to skip channel axis
//			LOG(INFO) << "COMPUTE OUTPUT 3";
			const int input_dim = this->input_shape(i + 1);
			const int kernel_extent = dilation_data[i] * (kernel_shape_data[i] - 1) + 1;
			const int output_dim = stride_data[i] * (input_dim - 1)
								   + kernel_extent - 2 * pad_data[i];
			this->output_shape_.push_back(output_dim);
//			LOG(INFO) << "COMPUTE OUTPUT 4";
		}
//		LOG(INFO) << this->output_shape_[0] << " " << this->output_shape_[1] << " " << this->output_shape_[2] << " " << this->output_shape_[3];
		this->height_out_ = this->output_shape_[0];
		this->width_out_ = this->output_shape_[1];
	}

	template<typename Dtype>
	void DeconvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype> *> &bottom,
												const vector<Blob<Dtype> *> &top) {
		const Dtype *weight = this->blobs_[0]->cpu_data();
		for (int i = 0; i < bottom.size(); ++i) {
			const Dtype *bottom_data = bottom[i]->cpu_data();
			Dtype *top_data = top[i]->mutable_cpu_data();
			for (int n = 0; n < this->num_; ++n) {
				this->backward_cpu_gemm(bottom_data + n * this->bottom_dim_, weight,
										top_data + n * this->top_dim_);
				if (this->bias_term_) {
					const Dtype *bias = this->blobs_[1]->cpu_data();
					this->forward_cpu_bias(top_data + n * this->top_dim_, bias);
				}
			}
		}
	}

	template<typename Dtype>
	void DeconvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype> *> &top,
												 const vector<bool> &propagate_down,
												 const vector<Blob<Dtype> *> &bottom) {
		const Dtype *weight = this->blobs_[0]->cpu_data();
		Dtype *weight_diff = this->blobs_[0]->mutable_cpu_diff();
		for (int i = 0; i < top.size(); ++i) {
			const Dtype *top_diff = top[i]->cpu_diff();
			const Dtype *bottom_data = bottom[i]->cpu_data();
			Dtype *bottom_diff = bottom[i]->mutable_cpu_diff();
// Bias gradient, if necessary.
			if (this->bias_term_ && this->param_propagate_down_[1]) {
				Dtype *bias_diff = this->blobs_[1]->mutable_cpu_diff();
				for (int n = 0; n < this->num_; ++n) {
					this->backward_cpu_bias(bias_diff, top_diff + n * this->top_dim_);
				}
			}
			if (this->param_propagate_down_[0] || propagate_down[i]) {
				for (int n = 0; n < this->num_; ++n) {
// Gradient w.r.t. weight. Note that we will accumulate diffs.
					if (this->param_propagate_down_[0]) {
						this->weight_cpu_gemm(top_diff + n * this->top_dim_,
											  bottom_data + n * this->bottom_dim_, weight_diff);
					}
// Gradient w.r.t. bottom data, if necessary, reusing the column buffer
// we might have just computed above.
					if (propagate_down[i]) {
						this->forward_cpu_gemm(top_diff + n * this->top_dim_, weight,
											   bottom_diff + n * this->bottom_dim_,
											   this->param_propagate_down_[0]);
					}
				}
			}
		}
	}

#ifdef CPU_ONLY
	STUB_GPU(DeconvolutionLayer);
#endif

	INSTANTIATE_CLASS(DeconvolutionLayer);

	REGISTER_LAYER_CLASS(Deconvolution);

}  // namespace caffe
